Reading data ..

## [1] 266296     11
## [1] 16123

Roll up the data at a day level and remove observations less than 3 days

## [1] 16071
## [1] 244150      7
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Total number of rows and columns are 244150, 8 and total number of unique patients are 16071

Roll up the data at a week level and remove observations just 1 week

## [1] 16108
## [1] 186956      7
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Merge good list with patient at a day level

Total number of rows and columns are 186956, 7 and total number of unique patients are 16108

Calculate wt gain by taking first entry of the patient and last week prior week 41

Merge Age, height and calculate BMI

Calculate BMI

## [1] 17
# weight gain = T1-T3
weight_gain <- day_level_demo %>% 
  group_by(Dim_Patient)  %>% 
  arrange(Dim_Patient,date) %>% 
  filter((row_number()==1 & trimester== "T1") | (row_number()==n() & trimester== "T3")) %>% 
  mutate(n = n()) %>%
  filter(n>1) %>% arrange(Dim_Patient, trimester) %>%
  summarise(weight_gain = diff(wt_avg))

day_level_demo <- merge(day_level_demo,initial_wt, by= "Dim_Patient", all.x = T)

# create a categorical variable for bmi within and outside recommended range
day_level_demo$bmi_flag <- ifelse(is.na(day_level_demo$weight_gain),NA,
                                  ifelse((day_level_demo$bmi_category=="Underweight") & 
                              (day_level_demo$weight_gain < 40) &  
                                  (day_level_demo$weight_gain > 28), 1,
                              ifelse((day_level_demo$bmi_category=="Normal Weight") &
                                       (day_level_demo$weight_gain > 25) & 
                                           (day_level_demo$weight_gain <35), 1,
                                     ifelse((day_level_demo$bmi_category=="Overweight") &
                                       (day_level_demo$weight_gain > 15) & 
                                           (day_level_demo$weight_gain <25),1,
                                       ifelse((day_level_demo$bmi_category=="Obese") &
                                       (day_level_demo$weight_gain > 11) & 
                                           (day_level_demo$weight_gain <20),1,0)))))

# day_level_demo %>% filter((day_level_demo$bmi_category=="Overweight") &
#                                        (day_level_demo$weight_gain > 15) & 
#                                            (day_level_demo$weight_gain <25) & (Dim_Patient == "ref:Patient/Athena#13122|1966456"))

# calculate wt difference
day_level_demo <- day_level_demo %>% 
  group_by(Dim_Patient)  %>% 
  arrange(Dim_Patient,date) %>%
  mutate(wt_diff = c(0, diff(wt_avg)))

# cross tab
crosstab(day_level_demo, col.vars = "bmi_flag", row.vars = "bmi_category", type = "f")
##               bmi_flag      0      1    Sum
## bmi_category                               
## Normal Weight           85641  27686 113327
## Obese                   44663  13689  58352
## Overweight              49049  18572  67621
## Underweight              2714   1009   3723
## Sum                    182067  60956 243023

Merge week level data

week_level_demo <- merge(week_level, ht[,c("Dim_Patient", "Activated_At",
                                         "Date_of_Birth", "Age_at_Activation_(years)", 
                                         "Customer_Group", "Height_(m)")],
                        by= "Dim_Patient", all.x = TRUE)

# weight gain
week_level_demo <- merge(week_level_demo, initial_wt, by= "Dim_Patient", all.x = T)

# BMI
week_level_demo <- merge(week_level_demo, init_wt[, c("Dim_Patient", "bmi")], by = "Dim_Patient", all.x = T)

week_level_demo$bmi_category <- ifelse(week_level_demo$bmi <18.5, "Underweight", 
                                      ifelse(week_level_demo$bmi<25, "Normal Weight",
                                             ifelse(week_level_demo$bmi<30, "Overweight", 
                                                           "Obese")))


# create a categorical variable for bmi within and outside recommended range
week_level_demo$bmi_flag <- ifelse(is.na(week_level_demo$weight_gain),NA,
                                  ifelse((week_level_demo$bmi_category=="Underweight") & 
                              (week_level_demo$weight_gain < 40) &  
                                  (week_level_demo$weight_gain > 28), 1,
                              ifelse((week_level_demo$bmi_category=="Normal Weight") &
                                       (week_level_demo$weight_gain > 25) & 
                                           (week_level_demo$weight_gain <35), 1,
                                     ifelse((week_level_demo$bmi_category=="Overweight") &
                                       (week_level_demo$weight_gain > 15) & 
                                           (week_level_demo$weight_gain <25), 1,
                                       ifelse((week_level_demo$bmi_category=="Obese") &
                                       (week_level_demo$weight_gain > 11) & 
                                           (week_level_demo$weight_gain <20),1,0)))))


# calculate wt difference
week_level_demo <- week_level_demo %>% 
  group_by(Dim_Patient)  %>% 
  arrange(Dim_Patient,week) %>%
  mutate(wt_diff = c(0, diff(wt_avg)))

#week_level_demo %>% filter((week_level_demo$bmi_category=="Obese"))

#write.csv(week_level_demo, "week_level_demo_6_2.csv")

0.1 Check the variation in BMI recommend range weight gains outside and within

Class counts - 0 is outside bmi range and 1 is within bmi range

## 
##     0     1 
## 12527  3414
##               bmi_flag      0      1    Sum
## bmi_category                               
## Normal Weight           62893  21440  84333
## Obese                   35712  11118  46830
## Overweight              37261  14623  51884
## Underweight              2056    855   2911
## Sum                    137922  48036 185958

Before performing t-test we need to verify if the following t-test assumptions are valid:

Assumption1: Are the two samples independents? Yes

Assumption2: Are the data from each of the 2 groups follow a normal distribution? shapiro.test() to compute Shapiro-Wilk test for each group of samples.

Can’t perform Shapiro test as sample size is large //Shapiro test fails, that means data is not normally distributed, but the sample size is large enough (n //> 30), so we can ignore the distribution of the data and use parametric tests.

Assumption3 - have same variance:

## 
##  F test to compare two variances
## 
## data:  n by bmi_flag
## F = 1.0349, num df = 12526, denom df = 3413, p-value = 0.2127
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.9805347 1.0913305
## sample estimates:
## ratio of variances 
##            1.03488

The p-value of F-test is p = 0.2127. It’s greater than the significance level alpha = 0.05. In conclusion, there is no significant difference between the variances of the two sets of data. Therefore, we can use the classic t-test which assume equality of the two variances

## 
##  Welch Two Sample t-test
## 
## data:  n by bmi_flag
## t = -20.161, df = 5490.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.357894 -2.762747
## sample estimates:
## mean in group 0 mean in group 1 
##        11.00998        14.07030

T test shows that the mean of the two samples is not same.

0.2 Logistic regression

Use number of observations (as a proxy to engagement) to find if recommended weight gain will be within the range or outside

## 
## Call:
## glm(formula = bmi_flag ~ n, family = binomial(link = "logit"), 
##     data = count_sub_week)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2555  -0.7016  -0.6105  -0.5745   1.9599  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.850540   0.035902  -51.54   <2e-16 ***
## n            0.044182   0.002278   19.39   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 16560  on 15940  degrees of freedom
## Residual deviance: 16191  on 15939  degrees of freedom
## AIC: 16195
## 
## Number of Fisher Scoring iterations: 4

Frequency distribution of observations

To make the results more interpretable, forming 3 classes of observations - low freq(0-6 weeks), moderate freq (6-15 weeks), high freq (15 onwards).

## 
##     High freq      Low freq Moderate freq 
##          4434          3279          8228
## 
##     0     1 
## 12527  3414
##          obs_categories High freq Low freq Moderate freq   Sum
## bmi_flag                                                      
## 0                            3119     2962          6446 12527
## 1                            1315      317          1782  3414
## Sum                          4434     3279          8228 15941

Now perform logistic regression with the observation classes.

## 
## Call:
## glm(formula = bmi_flag ~ obs_categories, family = binomial(link = "logit"), 
##     data = count_sub_week)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8388  -0.6987  -0.6987  -0.4509   2.1617  
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -0.86368    0.03288 -26.268   <2e-16 ***
## obs_categoriesLow freq      -1.37104    0.06762 -20.276   <2e-16 ***
## obs_categoriesModerate freq -0.42205    0.04240  -9.955   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 16560  on 15940  degrees of freedom
## Residual deviance: 16074  on 15938  degrees of freedom
## AIC: 16080
## 
## Number of Fisher Scoring iterations: 4

having low freq(less engagement), versus high freq(more engagement), changes the log odds of weight gain within the recommended range by -1.37104 similarly it changes by -0.42205 in case of moderate freq

In other words, log of odds ratio to be within bmi range decreases by 1.37 when frequency counts changes from high class to low class.

Odds ratio:

##                 (Intercept)      obs_categoriesLow freq 
##                   0.4216095                   0.2538422 
## obs_categoriesModerate freq 
##                   0.6557028

To make the results more interpretable, forming 3 classes of observations - low freq(0-4 weeks), moderate freq (4-10 weeks), high freq (10 onwards).

0.3 How many total patients by:

0.3.1 - Practice group

0.3.2 Age

## [1] 65

0.3.3 BMI Categories

0.4 How often did each patient enter weights:

    - Entire population 
     - Practice group 
   - Age bracket (5 years)
   - BMI category (less than 25 ; 25.1 - 29.9 ; 30-34.9 ; 35.0-39.9 ; 40-59.9 ; 60 -over)
   

Entire Population

Practice groups

Age categories

BMI table

0.5 What is the average starting time (I.e Gestational age at first entry ) By:

--same as previous 

0.6 What is the length of involvement (i.e difference of gestational age) By:

-- same as previous 
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.

0.7 Weight difference at the start - finish of each trimester by (may be hard for people with only 1 entry per trimester):

  - Whole group 
  - Practice group
  - Age 
 - BMI group 
 

Whole group

Practice group

Age

0.8 The rate of weight gain in the second / third trimesters? So average lbs / week ? This would be weight difference in T2 divided by week difference in T2. and then the same for T3

The values do not reside in the recommended range of the reference table

Anova in T2 to change if the bmi groups have different means

Frequency distribution of rate of weight gain in T2 and T3

Perform anova to test if the bmi groups have different means for rate of weight gain

## Anova Table (Type II tests)
## 
## Response: rate_of_wtgain
##              Sum Sq    Df F value    Pr(>F)    
## bmi_category  394.7     3  184.33 < 2.2e-16 ***
## Residuals    9436.3 13219                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova test shows that there is a significat diff within the bmi groups when rate of weight is used as a response variable. The same is observed in case of T3

## Anova Table (Type II tests)
## 
## Response: rate_of_wtgain
##               Sum Sq   Df F value    Pr(>F)    
## bmi_category    57.2    3  9.7097 2.149e-06 ***
## Residuals    18514.8 9431                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Following table shows the interaction between bmi category and trimester on the rate of weight gain has a significant effect. As well as, bmi category and trimester variables have a significant impact on the rate of weight gain.

## Anova Table (Type II tests)
## 
## Response: rate_of_wtgain
##                         Sum Sq    Df F value    Pr(>F)    
## bmi_category             443.3     3 120.559 < 2.2e-16 ***
## trimester                172.6     1 140.787 < 2.2e-16 ***
## n                        190.4     1 155.352 < 2.2e-16 ***
## bmi_category:trimester    59.0     3  16.055 2.014e-10 ***
## Residuals              27760.8 22649                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Lets build a linear regression model for rate of wt gain as a response variable and bmi category, trimester, age as predictors.

## 
## Call:
## lm(formula = rate_of_wtgain ~ bmi_category + trimester + n + 
##     `Age_at_Activation_(years)`, data = q5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.623  -0.382   0.025   0.429  49.899 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  1.380249   0.034994  39.442  < 2e-16 ***
## bmi_categoryObese           -0.336409   0.018109 -18.577  < 2e-16 ***
## bmi_categoryOverweight      -0.083970   0.017851  -4.704 2.57e-06 ***
## bmi_categoryUnderweight      0.069644   0.060369   1.154    0.249    
## trimesterT3                 -0.176783   0.014936 -11.836  < 2e-16 ***
## n                           -0.010879   0.000907 -11.994  < 2e-16 ***
## `Age_at_Activation_(years)` -0.004743   0.001040  -4.560 5.15e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.108 on 22651 degrees of freedom
## Multiple R-squared:  0.02706,    Adjusted R-squared:  0.0268 
## F-statistic:   105 on 6 and 22651 DF,  p-value: < 2.2e-16

Model shows that bmi categories, trimester, number of observations and age are statistically significant. The interaction between bmi category and trimester is not significant.

The R square value shows how much the model explains the variability around the mean in the response variable. In this case, the R sqaure low ~2.9% Hence the model with the variables bmi categories, trimester, number of observations and age does not fit well with the data.

To understand what does it mean when only category is a significant variable and what to do in that case refer: https://www.listendata.com/2016/07/insignificant-levels-of-categorical-variable.html

Coef can be interpreted as, if the bmi group changes from normal weight to over-weight group the rate of wt gain changes by -0.07 Logsitic regression to predict bmi groups within the recommended rate of weight gain:

## 
##     0     1 
## 19617  3041
##          bmi_category Normal Weight Obese Overweight Underweight   Sum
## bmi_flag                                                              
## 0                              7996  5548       5825         248 19617
## 1                              1759   618        563         101  3041
## Sum                            9755  6166       6388         349 22658
## 
##     High freq      Low freq Moderate freq 
##          3424         11220          8014
##          obs_categories High freq Low freq Moderate freq   Sum
## bmi_flag                                                      
## 0                            2960     9800          6857 19617
## 1                             464     1420          1157  3041
## Sum                          3424    11220          8014 22658
## Anova Table (Type II tests)
## 
## Response: rate_of_wtgain
##                 Sum Sq    Df F value   Pr(>F)    
## obs_categories    70.5     2  28.031 6.94e-13 ***
## Residuals      28496.7 22655                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova between rate of wt gain and observation categories show that all the categories have different mean values.

Logistic regression between bmi flag and observation categories show that the observation categories are not the significant variables for the classification model (does not have impact when it predict of the classes)

The coef can be intrepreted as a patient low freq(less engagement), versus high freq(more engagement), changes the log odds of rate of weight gain within the recommended range by -0.07 similarly it changes by 0.07 in case of moderate freq.

## 
## Call:
## glm(formula = bmi_flag ~ obs_categories, family = binomial(link = "logit"), 
##     data = q5)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5584  -0.5584  -0.5202  -0.5202   2.0332  
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -1.85306    0.04993 -37.113   <2e-16 ***
## obs_categoriesLow freq      -0.07867    0.05744  -1.370    0.171    
## obs_categoriesModerate freq  0.07362    0.05919   1.244    0.214    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 17869  on 22657  degrees of freedom
## Residual deviance: 17856  on 22655  degrees of freedom
## AIC: 17862
## 
## Number of Fisher Scoring iterations: 4

0.9 weight difference from the first observation to the last observation (IN TRIMESTER 3)

by: 
  - same as above 
  
  
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Practice groups

Age

BMI

0.10 Weight difference from last entry in T3 to last entry in PP by:

 - same as above 
 
 

Whole population

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Practice groups

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Age

BMI

0.11 Check if weights difference are statistically significant

Null hypothesis is that the mean of the weight differences is same for all groups

##                  Df  Sum Sq Mean Sq F value Pr(>F)    
## age_category      6    2143   357.1   22.96 <2e-16 ***
## Residuals    241799 3760732    15.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Call:
##    aov(formula = weight_diff ~ age_category, data = age)
## 
## Terms:
##                 age_category Residuals
## Sum of Squares          2143   3760732
## Deg. of Freedom            6    241799
## 
## Residual standard error: 3.943746
## Estimated effects may be unbalanced

As the p value is low we reject the null hypothesis, which represents the groups are different from each other. Further we us tukeyhsd to check which groups are statistically different from each other

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = weight_diff ~ age_category, data = age)
## 
## $age_category
##                    diff         lwr         upr     p adj
## 20-24-14-19 -0.19530016 -0.40161002  0.01100969 0.0773966
## 25-29-14-19 -0.38750736 -0.58352133 -0.19149340 0.0000001
## 30-34-14-19 -0.46883161 -0.66360666 -0.27405656 0.0000000
## 35-39-14-19 -0.42692858 -0.62701406 -0.22684310 0.0000000
## 40-44-14-19 -0.32098710 -0.58311919 -0.05885501 0.0056680
## 45-49-14-19 -0.78528212 -1.33981373 -0.23075052 0.0005919
## 25-29-20-24 -0.19220720 -0.28078745 -0.10362695 0.0000000
## 30-34-20-24 -0.27353145 -0.35933532 -0.18772758 0.0000000
## 35-39-20-24 -0.23162841 -0.32888675 -0.13437008 0.0000000
## 40-44-20-24 -0.12568694 -0.32097702  0.06960314 0.4818916
## 45-49-20-24 -0.58998196 -1.11622362 -0.06374030 0.0165013
## 30-34-25-29 -0.08132425 -0.13806951 -0.02457899 0.0004761
## 35-39-25-29 -0.03942121 -0.11233831  0.03349589 0.6863040
## 40-44-25-29  0.06652026 -0.11785961  0.25090013 0.9386147
## 45-49-25-29 -0.39777476 -0.92006587  0.12451635 0.2711895
## 35-39-30-34  0.04190304 -0.02761493  0.11142100 0.5636067
## 40-44-30-34  0.14784451 -0.03521772  0.33090674 0.2064748
## 45-49-30-34 -0.31645051 -0.83827792  0.20537690 0.5562369
## 40-44-35-39  0.10594147 -0.08276110  0.29464404 0.6460787
## 45-49-35-39 -0.35835355 -0.88218627  0.16547918 0.4037367
## 45-49-40-44 -0.46429502 -1.01482182  0.08623178 0.1640505

age group 14-19 and 20-24 are statiscally different from other groups

Similarlly test differences in bmi categories:

##                  Df  Sum Sq Mean Sq F value Pr(>F)    
## bmi_category      3    2948   982.6    63.2 <2e-16 ***
## Residuals    242869 3776042    15.5                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Call:
##    aov(formula = weight_diff ~ bmi_category, data = bmi)
## 
## Terms:
##                 bmi_category Residuals
## Sum of Squares          2948   3776042
## Deg. of Freedom            3    242869
## 
## Residual standard error: 3.943051
## Estimated effects may be unbalanced
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = weight_diff ~ bmi_category, data = bmi)
## 
## $bmi_category
##                                  diff         lwr         upr     p adj
## Obese-Normal Weight       -0.24902295 -0.30069790 -0.19734799 0.0000000
## Overweight-Normal Weight  -0.02775685 -0.07696784  0.02145415 0.4686745
## Underweight-Normal Weight  0.26555931  0.09683658  0.43428205 0.0003071
## Overweight-Obese           0.22126610  0.16398355  0.27854865 0.0000000
## Underweight-Obese          0.51458226  0.34333119  0.68583333 0.0000000
## Underweight-Overweight     0.29331616  0.12279241  0.46383992 0.0000586

Apart from overweight and normal weight, all groups are statistically different.

0.12 Can we get a rate of weight gain (T3-T1) for each group ; and rate of weight loss for PP (PP-T3)

  - Perhaps here we can interpolate for PP (only 6 weeks)???? 
  

Weight Gain:

0.13 We want to know the behavior of outliers (i.e women who gained MORE or LESS than the goodommended weight gain based on BMI) This would be T1 - T3 (so over the whole pregnancy)

  • For underweight MORE than 40 or LESS than 28
  • Normal weight more than 35 ; less than 25
  • Overweight more than 25 ; less than 15
  • Obese more than 20 ; less than 1

BMI underweight

##             Engagemnt
## BMI          bad good
##   bmi_over     8   54
##   bmi_within  10   90
## [1] "difference in proportions"
##     prop 1 
## 0.02903226
## [1] "For patients outside recommended range of BMi, we expect to observe 7 patients with bad engagement for every 55 patients with good engagement "
## [1] 4/27
## [1] "Likewise in the within recommended range og BMI group, we expect to observe 2 patients with bad engagement for every 23 patients with good engagement"
## [1] 1/9
##             odds ratio with 95% C.I.
## BMI          estimate     lower    upper
##   bmi_within 1.000000        NA       NA
##   bmi_over   1.335387 0.4754595 3.639486

Odds of bad engagement are at least 47% higher for the outside goodommended weight gain group.

BMI normal weight

##             Engagemnt
## BMI          bad good
##   bmi_over   175 1583
##   bmi_within 253 2461
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  MI
## X-squared = 0.42274, df = 1, p-value = 0.5156
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.0119067  0.0245559
## sample estimates:
##     prop 1     prop 2 
## 0.09954494 0.09322034
## [1] "difference in proportions"
##      prop 1 
## 0.006324598
## [1] "For patients outside recommended range of BMi, we expect to observe 121 patients with bad engagement for every 1637 patients with good engagement "
## [1] 175/1583
## [1] "Likewise in the within recommended range of BMI group, we expect to observe 90 patients with bad engagement for every 1267 patients with good engagement"
## [1] 11/107
##             odds ratio with 95% C.I.
## BMI          estimate    lower    upper
##   bmi_within 1.000000       NA       NA
##   bmi_over   1.075609 0.877076 1.316466

Odds of bad engagement are at least 81% higher for the outside goodommended weight gain group.

BMI over weight

##             Engagemnt
## BMI          bad good
##   bmi_over    97  899
##   bmi_within 161 1340
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  MI
## X-squared = 0.52777, df = 1, p-value = 0.4675
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.03487567  0.01513114
## sample estimates:
##     prop 1     prop 2 
## 0.09738956 0.10726183
## [1] "difference in proportions"
##       prop 1 
## -0.009872267
## [1] 97/899
## [1] 161/1340
##             odds ratio with 95% C.I.
## BMI           estimate     lower   upper
##   bmi_within 1.0000000        NA      NA
##   bmi_over   0.8986128 0.6869524 1.17001

Odds of bad engagement are at least 81% higher for the outside goodommended weight gain group.

BMI Obese

##             Engagemnt
## BMI          bad good
##   bmi_over   125  843
##   bmi_within 190 1189
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  MI
## X-squared = 0.29548, df = 1, p-value = 0.5867
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.03740641  0.02010887
## sample estimates:
##    prop 1    prop 2 
## 0.1291322 0.1377810
## [1] "difference in proportions"
##       prop 1 
## -0.008648769
## [1] 125/843
## [1] 190/1189
##             odds ratio with 95% C.I.
## BMI           estimate     lower    upper
##   bmi_within 1.0000000        NA       NA
##   bmi_over   0.9283116 0.7271287 1.181602

Weight loss:

Age categories weight gain

Age wt loss

BMI Cateogries

Wt gain

BMI wt loss